************************
* The limits of ‘Western’ supply chain sustainability governance to halt deforestation
* Lead Author: Christoph Kubitza
* Email: Christoph.Kubitza@giga-hamburg.de
* Date: 03.7.2025

***********************
* GENERATING MERGED DATASET
************************

clear all
set more off
version 16

global LMI "C:\Users\christoph.kubitza\OneDrive - GIGA\Land Matrix\General\Stata_LM\Beta Data"
cd "C:\Users\christoph.kubitza\OneDrive - GIGA\Working Paper\CK Oil palm investors\Data\"

*Import main dataset from script 2
import dbase "03_temp/final/Intersection_Bl_C_stats_histo5_wgs84_centroids_p2_r.dbf", clear

*Rename and order
      rename unique_id pixel_id
      *Unnecessary var from different layer
      drop unique_id_ objectid_1 layer path
      *Correct unit is m2
      rename area_ha pixarea_m2
      *Empty var that emerged during an early merge of layers
      drop area_ha_ne area_ha__2 area_ha__1 area_ha__3  
      order CNTRY_NAME ADMIN_NAME CNTRY_CODE IPUM2000 REGY2000 PARENT poly_id pixel_id pixarea_m2, before( GP_LEGAL_S)
      rename *, lower
      rename parent prov2000
      rename gaez_mean att_yield 
      rename road_mean road_acc
      rename srtm_mean srtm_alti
      drop road_sum road_count gaez_sum gaez_count ccover_mea

      *RSPO deal
      replace rspo_deal=0 if rspo_deal==.
      *Correct the overlap
      bysort poly_id: asgen rspo_poly_id = rspo_deal 
      replace rspo_deal=1 if rspo_poly_id>0.90
      replace rspo_deal=0 if rspo_poly_id<0.25
   
**Generate variables
*GFC Tree Loss
      *relate band numbers to correct years
      rename treeloss* treeloss200*
      rename treeloss2001* treeloss201*
      rename treeloss2002* treeloss202*
      rename treeloss201 treeloss2001
      rename treeloss202 treeloss2002
      rename treeloss2000 gfc_notreeloss
      egen treeloss0121=rowtotal(treeloss*)
      *Aggregate treeloss for time periods
      egen treeloss0109=rowtotal(treeloss2001-treeloss2009)
      egen treeloss0111=rowtotal(treeloss2001-treeloss2011)
      egen treeloss0110=rowtotal(treeloss2001-treeloss2010)
      egen treeloss0108=rowtotal(treeloss2001-treeloss2008)
      egen treeloss0921=rowtotal(treeloss2009-treeloss2021)
      egen treeloss1121=rowtotal(treeloss2011-treeloss2021)
      egen treeloss1221=rowtotal(treeloss2012-treeloss2021)
      egen treeloss1021=rowtotal(treeloss2010-treeloss2021)
      egen treeloss_totalpixel=rowtotal(treeloss0121 gfc_notreeloss)
      *Calculate share of tree loss in each pixel
      foreach var of varlist treeloss1121 treeloss1221 treeloss0110 treeloss0121 treeloss0108 treeloss0921 treeloss1021  {
      gen `var'_share= `var'/ treeloss_totalpixel
      }
      rename treeloss* gfc_treeloss*
      order gfc_treeloss*, after(gfw_global)

      *Forestshare
      rename ccover_sum canopycover2000_sum
      rename ccover_cou canopycover2000_count
      order canopycover2000_*, after(gfw_global)
      bysort poly_id: egen gfc_treeloss0121_deal=sum(gfc_treeloss0121)
      bysort poly_id: egen gfc_treeloss_totalpixel_deal=sum(gfc_treeloss_totalpixel)
      gen gfc_treeloss0121_deal_share= gfc_treeloss0121_deal/ gfc_treeloss_totalpixel_deal
      *Forest cover 2010
      gen gfc_forestcover2010=canopycover2000_sum- gfc_treeloss0109
      gen gfc_forestcover2011=canopycover2000_sum- gfc_treeloss0110
      gen gfc_forestcover2012=canopycover2000_sum- gfc_treeloss0111
      *Treeloss can be higher than inital forestcover due to regrowth and second def events, we set this pixels to zero
      replace gfc_forestcover2010=0 if gfc_forestcover2010<0 & gfc_forestcover2010!=.
      replace gfc_forestcover2011=0 if gfc_forestcover2011<0 & gfc_forestcover2011!=.
      replace gfc_forestcover2012=0 if gfc_forestcover2012<0 & gfc_forestcover2012!=.
      gen gfc_forestcover2010_share=gfc_forestcover2010/canopycover2000_count
      gen gfc_forestcover2011_share=gfc_forestcover2011/canopycover2000_count
      gen gfc_forestcover2012_share=gfc_forestcover2012/canopycover2000_count
      gen gfc_forestcover2000_share=canopycover2000_sum/canopycover2000_count


*TMF JRC 2023
      rename jrc* tmf*
      egen tmf_totalpixel=rowtotal(tmf_0-tmf_2021)
      egen tmf_treeloss0021=rowtotal(tmf_2000-tmf_2021)
      egen tmf_treeloss1121=rowtotal(tmf_2011-tmf_2021)
      egen tmf_treeloss1221=rowtotal(tmf_2012-tmf_2021)
      egen tmf_treeloss1321=rowtotal(tmf_2013-tmf_2021)
      egen tmf_treeloss0010=rowtotal(tmf_2000-tmf_2010)
      egen tmf_treeloss0008=rowtotal(tmf_2000-tmf_2008)
      egen tmf_treeloss0921=rowtotal(tmf_2009-tmf_2021)
      egen tmf_treeloss1021=rowtotal(tmf_2010-tmf_2021)
      foreach var of varlist tmf_treeloss1121 tmf_treeloss1221 tmf_treeloss1321 tmf_treeloss0010 tmf_treeloss0021 tmf_treeloss0008 tmf_treeloss0921 tmf_treeloss1021 {
      gen `var'_share= `var'/ tmf_totalpixel
      }
      forvalues i=1987/2021 {
      egen tmf_treeloss1987`i'=rowtotal(tmf_1987- tmf_`i')
      gen tmf_forest_share_`i'= 1 - tmf_treeloss1987`i'/ tmf_totalpixel
      }

      *Gen var on deal level
      egen tmf_treeloss8721=rowtotal(tmf_1987-tmf_2021)
      bysort poly_id: egen tmf_treeloss8721_deal=sum(tmf_treeloss8721)
      bysort poly_id: egen tmf_totalpixel_deal=sum(tmf_totalpixel)
      gen tmf_treeloss8721_share_deal= tmf_treeloss8721/tmf_totalpixel_deal

      rename tmf_2* tmf_treeloss2*
      rename tmf_1* tmf_treeloss1*
      rename tmf_0 tmf_notreeloss
      order tmf_treeloss* tmf_notreeloss tmf_totalpixel_deal tmf_totalpixel, after(gfc_forestcover2000_share)

*OP Danylo
      *relate band numbers to correct years
      rename op_year* op_year_*
      rename op_year_3* op_year201*
      rename op_year_2* op_year200*
      rename op_year_1* op_year199*
      rename op_year_* op_year198*
      rename op_year1980 op_noyeardata
      destring regy2000, replace
      *No Danylo data for Papua, set value to missing
      foreach var of varlist op_year* {
      replace `var'=. if regy2000>9000
      }
      egen op8410=rowtotal(op_year1987-op_year2010)
      egen op8499=rowtotal(op_year1987-op_year1999)
      egen op0110=rowtotal(op_year2001-op_year2010)
      egen op0010=rowtotal(op_year2000-op_year2010)
      egen op0008=rowtotal(op_year2000-op_year2008)
      egen op1117=rowtotal(op_year2011-op_year2017)
      egen op1217=rowtotal(op_year2013-op_year2017)
      egen op1317=rowtotal(op_year2013-op_year2017)
      egen op1417=rowtotal(op_year2014-op_year2017)
      egen op0212=rowtotal(op_year2002-op_year2012)  
      egen op0217=rowtotal(op_year2002-op_year2017)                                 
      egen op8417=rowtotal(op_year*)        
      egen op_totpixel=rowtotal(op_year* op_noyeardata) 
      rename op_year* op_exp*
      foreach var of varlist op8410 op8499 op0110 op0010 op1117 op1317 op0212 op1417 op0217 op0008 op1217  {
      replace `var'=. if regy2000>9000
      }
      foreach var in op0212 op1117 op1317 op1417 op0110 op8499 op0217 op0008 op1217 {
      gen `var'_share = `var' / op_totpixel
      }

      *Gen var on deal level
      bysort poly_id: egen op8499_deal=sum(op8499)
      bysort poly_id: egen op_totpixel_deal=sum(op_totpixel)
      gen op8499_share_deal= op8499_deal/ op_totpixel_deal
      bysort poly_id: egen op8417_deal=sum(op8417)
      gen op8417_share_deal=op8417_deal/ op_totpixel_deal
      gen op2010_share= op8410/ op_totpixel

      forvalues i=1987/2017 {
      egen op1987_`i'=rowtotal(op_exp1987-op_exp`i')
      gen opshare_`i'= op1987_`i'/ op_totpixel
      }
      forvalues i=1987/2017 {
      gen opncumshare_`i'= op_exp`i'/op_totpixel
      }
      order op* , after(tmf_forest_share_2021)
      order operat_com, after(parent_com)
      
*Forest fire data
      rename fire_0 fire_nodata
      egen fire_0110=rowtotal(fire_1-fire_10)
      egen fire_1117=rowtotal(fire_11-fire_17)
      egen fire_1121=rowtotal(fire_11-fire_21)
      egen fire_1221=rowtotal(fire_12-fire_21)
      egen fire_1321=rowtotal(fire_13-fire_21)
      egen fire_1120=rowtotal(fire_11-fire_20)
      egen fire_1220=rowtotal(fire_12-fire_20)
      egen fire_1320=rowtotal(fire_13-fire_20)
      egen fire_1021=rowtotal(fire_10-fire_21)
      egen fire_totpixel=rowtotal(fire_* fire_nodata) 
      foreach var of varlist fire_1117 fire_0110 fire_1120 fire_1121 fire_1221 fire_1321 fire_1220 fire_1320 fire_1021 {
      gen `var'_share= `var'/fire_totpixel     
      }
      order fire* , after(tmf_forest_share_2021)

*Gaveau data
      *relate band numbers to correct years
      rename gav_1 gav_2009
      rename gav_2  gav_2005
      rename gav_3  gav_2011
      rename gav_4  gav_2001
      rename gav_5  gav_2012
      rename gav_6  gav_2014
      rename gav_7 gav_2013
      rename gav_8 gav_2018
      rename gav_9  gav_2010
      rename gav_10 gav_2015
      rename gav_11 gav_2019
      rename gav_12 gav_2006
      rename gav_13 gav_2000
      rename gav_14 gav_2003
      rename gav_15 gav_2016
      rename gav_16 gav_2007
      rename gav_17 gav_2002
      rename gav_18 gav_2004
      rename gav_19 gav_2017
      rename gav_20 gav_2008
      rename gav_nodata gavnodata
      egen gav_0019=rowtotal(gav_*)
      order gav_2000 gav_2001 gav_2002 gav_2003 gav_2004 gav_2005 gav_2006 gav_2007 gav_2008 gav_2009 gav_2010 gav_2011 gav_2012 gav_2013 gav_2014 gav_2015 gav_2016 gav_2017 gav_2018 gav_2019, after(tmf_forest_share_2021)
      egen gav_0010=rowtotal(gav_2000-gav_2010)
      egen gav_1119=rowtotal(gav_2011-gav_2019)
      egen gav_1219=rowtotal(gav_2012-gav_2019)
      egen gav_0919=rowtotal(gav_2009-gav_2019)
      egen gav_0008=rowtotal(gav_2000-gav_2008)
      egen gav_1019=rowtotal(gav_2010-gav_2019)
      egen gav_totalpixel=rowtotal(gav_0019 gavnodata)
      gen gav_1119_share= gav_1119/ gav_totalpixel
      gen gav_1219_share= gav_1219/ gav_totalpixel
      gen gav_0010_share= gav_0010/ gav_totalpixel
      gen gav_0019_share= gav_0019/ gav_totalpixel
      gen gav_0919_share= gav_0919/ gav_totalpixel
      gen gav_0008_share= gav_0008/ gav_totalpixel
      gen gav_1019_share= gav_1019/ gav_totalpixel


      forvalues i=2000/2019 {
      egen gav_2000_`i'=rowtotal(gav_2000-gav_`i')
      gen gavshare_`i'= gav_2000_`i'/gav_totalpixel
      }

      forvalues i=2000/2019 {
      gen gavncumshare_`i'= gav_`i'/gav_totalpixel
      }

      order gav*, after(tmf_forest_share_2021)


*Peatland data
      rename distance distance_p
      rename dist_in distance_w
      gen peat_d=1 if distance_p==0
      replace peat_d=0 if distance_p>0 & distance_p!=.
      gen peat_dis_km= distance_p/1000
      gen distance_insidepeat_km= (distance_w/1000)*(-1)
      replace peat_dis_km= distance_insidepeat_km if peat_dis_km==0
      drop distance_p distance_w distance_insidepeat_km
      order att_yield srtm_alti road_acc , after( peat_dis_km)


**Merge LMI data
      *Prepare dataset
      rename deal_id dealid
      *These domestic deals were deleted from LMI platform due to lack of data
      replace dealid=. if dealid==8327
      replace dealid=. if dealid==8328
      replace dealid=. if dealid==8329
      replace dealid=. if dealid==8564
      *Deal was wrongly matched as 8572 has only an approx location
      replace dealid=8533 if dealid==8572
      *This deal was not linked in the spatial dataset
      replace dealid=7987 if operat_com=="PT Tapian Nadenggan"

      *Merge LMI data
      merge m:1 dealid using "$LMI\download20230919\deals20230919.dta",  keep(mat mas) keepus(dealscope operatingcompanyname size_contract_mod contractdate* investorname* Country_Inv* Country_Inv* investorname*) nogen

      *Found a new deal and manually update here and not the GIS file
      *IN 2017, MP Evans announced it has agreed to acquire Sunrich Plantations Pte Ltd from SGP - which owns PT Bumi Mas Agro,.
      replace dealid=10140 if poly_id==220
      replace investorname1="M.P. Evans Group PLC"  if poly_id==220
      replace Country_Inv1="GBR"   if poly_id==220
      *New deal
      replace dealid=10141 if poly_id==71
      replace investorname1="NPC Resources Berhad"  if poly_id==71
      replace Country_Inv1="MYS"   if poly_id==71
      *New deal
      replace dealid=10143 if poly_id==130
      replace investorname1="Kencana Agri Limited"  if poly_id==130
      replace Country_Inv1="SGP"   if poly_id==130
      *New deal
      replace dealid=3873 if poly_id==359
      replace investorname1="Kencana Agri Limited"  if poly_id==359
      replace Country_Inv1="SGP"   if poly_id==359

      *update manually as long as LMI is not up to date
      *#9900: On 3August 2018, the Kualar Lumpur Kepong Company had completed the acquisition of 95% equity interest in PT Putra Bongan Jaya, which has been owned by the R.E.A. Holding before, for a cash consideration of RM58,819,00
      replace investorname1="REA" if dealid==9900
      replace Country_Inv1="GBR" if dealid==9900 
      * Data in LMI (maybe not yet active) but not yet in our file   
      replace investorname1="Sime Darby" if dealid==8555
      replace Country_Inv1="IDN" if dealid==8555 
      * Data in LMI (maybe not yet active) but not yet in our file   
      replace investorname1="Kuala Lumpur Kepong (KLK)" if dealid==9920
      replace Country_Inv1="MYS" if dealid==9920 
      *New information on deal in 2024
      replace investorname1="DTK Opportunity Ltd" if dealid==8212
      replace Country_Inv1="VGB" if dealid==8212 


**Generate investor var
      *Correct entries
      replace Country_Inv1="GBR" if parent_com=="REA"  
      replace investorname1="R.E.A. Holdings PLC" if parent_com=="REA"  
      replace Country_Inv2="SGP" if parent_com=="Wijaya family/Sinar Mas/GAR"  
      replace investorname2="Golden Agri Resources" if parent_com=="Wijaya family/Sinar Mas/GAR"  
      replace Country_Inv2="SGP" if investorname1=="Sinar Mas Group" 
      replace investorname2="Golden Agri Resources" if investorname1=="Sinar Mas Group"  
      
      *Investors high inc
      gen investor_highinc=0
      forvalues i=1/10 {
      replace investor_highinc=1 if Country_Inv`i'=="GBR" |  Country_Inv`i'=="KOR"  |  Country_Inv`i'=="SGP"  |  Country_Inv`i'=="USA"
      }
      
      *This deal is linked to a very complicated network of investors - one of them is silvery limited in SGP but little info to verify, we set deal to zero as no major trader
      replace investor_highinc=0 if dealid== 8219
      * First Resources is in the end owned by Fangiano family that is from Indonesia and deeply rooted in local politics
      gen investor_highinc_wSGP=investor_highinc
      replace investor_highinc_wSGP=0 if investorname1=="First Resources"

      *Without Singapure
      gen investor_highinc_woSGP=0
      forvalues i=1/10 {
      replace investor_highinc_woSGP=1 if Country_Inv`i'=="GBR" |  Country_Inv`i'=="KOR"  |  Country_Inv`i'=="USA"
      }

      *Without Asia
      gen investor_west=0
      forvalues i=1/10 {
      replace investor_west=1 if Country_Inv`i'=="GBR" |  Country_Inv`i'=="USA"
      }

      *Gen inv name
      gen investorname_highinc=investorname1
      replace investorname_highinc=investorname2 if  investorname2=="Golden Agri Resources"
      replace investorname_highinc="POSCO International" if investorname_highinc=="POSCO"
      replace investorname_highinc="Golden Agri Resources" if investorname_highinc=="Sinar Mas Group"
      gen investorctry_highinc=Country_Inv1
      replace investorname_highinc="" if investor_highinc_wSGP==0

*Merge downstream comp
      merge m:1 poly_id using "01_raw\Supply chain disclosure\Data\mills_downstreamcomp.dta", keep(mat mas) keepus(downstreamc_beiersdorf downstreamc_ferrero downstreamc_nestle downstreamc_pg downstreamc_unilever downstreamc_cargill num_downstream_c mill_data)  
      gen num_downstream_c_dum=1 if num_downstream_c>0 & num_downstream_c!=.
      replace num_downstream_c_dum=0 if num_downstream_c==0
      foreach var of varlist downstreamc* {
      replace `var'=0 if `var'==.
      }
      rename mill_data mill_data_pom

*Merge POMs
      merge m:1 poly_id using "01_raw\Palm Watch\data\Concessions_mills_pw_collapsed.dta", keep(mat mas) keepus(rspo_mill wsupply_2017 wsupply_2018 wsupply_2019 wsupply_2020 wsupply_2021 mill_scdata w_supply1721) nogen
      gen w_supply1721_dum=1 if w_supply1721>0 & w_supply1721!=.
      replace w_supply1721_dum=0 if  w_supply1721==0
      gen w_supply1721_condum=1 if wsupply_2017>0 & wsupply_2018>0 & wsupply_2019>0 & wsupply_2020>0 & wsupply_2021>0 & wsupply_2021!=.
      gen w_supply1720_dum=1 if (wsupply_2017>0 & wsupply_2017!=.) | (wsupply_2018>0 & wsupply_2018!=.) | (wsupply_2019>0 & wsupply_2019!=.) | (wsupply_2020>0 & wsupply_2020!=.)
      replace w_supply1720_dum=0 if w_supply1720_dum==.
      replace w_supply1721_condum=0 if w_supply1721_condum==.
      gen wsupply_2020_dum=1 if wsupply_2020>0 & wsupply_2020!=.
      replace wsupply_2020_dum=0 if wsupply_2020_dum==.
      gen wsupply_2021_dum=1 if wsupply_2021>0 & wsupply_2021!=.
      replace wsupply_2021_dum=0 if wsupply_2021_dum==.
      
**Labels
      label var gfc_treeloss1121_share "Share of area deforested 2011-21 (GFC)"
      label var gfc_treeloss0110_share "Share of area deforested 2001-10 (GFC)"
      label var gfc_treeloss1221_share "Share of area deforested 2012-21 (GFC)"
      label var op1117_share "Share of area planted with oil palm 2011-17"
      label var peat_d "Within peatland (=1)"
      label var investor_highinc "HIC investor (=1)"
      label var srtm_alti "Altitude (m)"
      label var road_acc "Accessibility Index"
      label var att_yield "Attainable yield for oil palm (kg/ha)"
      label var gfc_forestcover2012_share "Share of area forested in 2012 (GFC)"
      label var gfc_forestcover2011_share "Share of area forested in 2011 (GFC)"
      label var gfc_forestcover2000_share "Share of area forested in 2000 (GFC)"
      label var op2010_share "Share of area planted with oil palm 2010"

*Gen squared controls
      foreach var of varlist gavshare_2010 gavshare_2011 gavshare_2000 gavshare_2009 opshare_2013 opshare_2010 opshare_2001 opshare_2012 opshare_2000 opshare_2011  gfc_forestcover2000_share gfc_forestcover2011_share gfc_forestcover2010_share gfc_forestcover2012_share  tmf_forest_share_2010 tmf_forest_share_2000 tmf_forest_share_2009 tmf_forest_share_2011 tmf_forest_share_2012 {
      gen `var'_sq=`var'*`var'
      }

*Gen additional variables
      bysort poly_id: asgen gavshare_2000_poly_id = gavshare_2000 , weights(gav_totalpixel) 
      gen active_concession=0 if gavshare_2000_poly_id>0.01      
      *With less then 1% of OP plating btw 2000 and 2019
      bysort poly_id: asgen gavshare_2019_poly_id = gavshare_2019 , weights(gav_totalpixel) 
      replace active_concession=0 if gavshare_2019_poly_id<0.01  
      replace active_concession=1 if active_concession==.
      gen pixarea_ha= pixarea_m2/10000
      gen pixarea_ha_peat= (pixarea_m2*peat_d)/10000
      gen pixarea_ha_nopeat= pixarea_m2/10000 if peat_d==0
      gen pixarea_ha_w_supply21_dum= pixarea_m2*wsupply_2021_dum/10000 
      gen pixarea_ha_w_supply20_dum= pixarea_m2*wsupply_2020_dum/10000 
      gen pixarea_ha_gav= (pixarea_m2* gavshare_2019 )/10000
      
save "04_master/master_data_v2.dta", replace

*PANEL DATASET
use "04_master/master_data_v2.dta", clear
      *Drop deals with more than 1% OP already in 2000
      drop if gavshare_2000_poly_id>0.01      
      *With less then 1% of OP plating btw 2000 and 2019
      drop if gavshare_2019_poly_id<0.01  

      keep opncumshare_* tmf_forest_share_* gav* gfc_treeloss_totalpixel gfc_treeloss2* opshare_* op_exp* op_totpixel tmf*  investorname1 operatingcompanyname peat_d peat_dis_km att_yield srtm_alti road_acc cntry_name admin_name regy2000 poly_id  pixel_id pixarea_m2 operat_com parent_com dealid investor_highinc investorname_highinc investorctry_highinc investor_highinc_woSGP rspo_mill wsupply_2021_dum
      drop  gav_1119_share gav_1219_share gav_0010_share gav_0019_share gav_2000_2000 gav_*_*  gavshare_*_poly_id gav_0019  gav_0010 gav_1119 gav_1219      
      drop regy2000 tmf_treeloss0021 tmf_treeloss1121 tmf_treeloss1221 tmf_treeloss1321 tmf_treeloss0010     
      drop tmf_treeloss19871987-tmf_treeloss19872021 *_sq *_deal gav_0919 gav_0008
      drop tmf_treeloss8721  tmf_notreeloss tmf_treeloss1121_share tmf_treeloss1221_share tmf_treeloss1321_share tmf_treeloss0010_share tmf_treeloss0021_share
      drop tmf_treeloss0008_share tmf_treeloss0921_share tmf_treeloss0921 tmf_treeloss0008 tmf_treeloss1021 tmf_treeloss1021_share gav_1019
      reshape long tmf_forest_share_ gav_ gavshare_ gavncumshare_ opncumshare_ tmf_treeloss opshare_ op_exp gfc_treeloss  , j(year) i( poly_id  pixel_id )
      rename tmf_forest_share_ tmf_forest_share
      gen tmf_treeloss_share= tmf_treeloss/ tmf_totalpixel
      gen op_exp_share = op_exp / op_totpixel 
      rename opshare_ op_share

save "04_master/master_data_panel_v2.dta", replace


